import matplotlib.pyplot as plt
import seaborn as sns
import os
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import StandardScaler
from sklearn.cluster import KMeans
from sklearn.metrics import silhouette_score
import numpy as np
from scipy import stats
import sys
sys.path.append(sys.argv[1])

import pandas as pd   
import pickle

# Load the dataset
laptops_price = pd.read_csv(os.path.join(sys.argv[1], 'laptops_price.csv'))

price_conversion_rate = 1e-4
 
# Remove 'kg' from the weight values and convert to float
laptops_price['Weight'] = laptops_price['Weight'].str.replace('kg', '')
laptops_price['Weight'] = pd.to_numeric(laptops_price['Weight'], errors='coerce')
 
# Drop rows with NaN values in the 'Weight' column
laptops_price = laptops_price.dropna(subset=['Weight'])
 
# Convert weight to pounds
laptops_price['Weight_pounds'] = laptops_price['Weight'] * 2.20462
 
# Apply the price conversion rate
laptops_price['Price'] = laptops_price['Price'] * price_conversion_rate
 
# Filter the dataset
filtered_laptops = laptops_price[
    (laptops_price['Weight_pounds'] <= 3) &
    (laptops_price['Screen Size'].str.replace('"', '').astype(float).between(13, 15)) &
    (laptops_price['Price'].between(800, 1500))
]
 
# Select the required columns (without 'Storage')
filtered_laptops = filtered_laptops[['Manufacturer', 'Model Name', 'Category', 'Screen Size', 'Screen', 'CPU', 'RAM', 'GPU', 'Operating System', 'Operating System Version', 'Weight', 'Price']]

print(filtered_laptops)
# pickle.dump(filtered_laptops,open("./ref_result/filtered_laptops.pkl","wb"))


import pandas as pd   
import pickle
import matplotlib.pyplot as plt

# Load the dataset

# Define powerful CPUs (you can modify the list based on the desired CPUs)
powerful_cpus = [
 "Intel Core i7", "Intel Core i9", "AMD Ryzen 7", "AMD Ryzen 9"
]
 
# Create a boolean mask for powerful CPUs
filtered_laptops["Powerful_CPU"] = filtered_laptops["CPU"].apply(lambda x: any(cpu in x for cpu in powerful_cpus))
 
# Filter the dataset for powerful CPUs and at least 8GB of RAM
powerful_laptops = filtered_laptops[(filtered_laptops["Powerful_CPU"]) & (filtered_laptops["RAM"].str.replace("GB", "").astype(int) >= 8)]
 
# Count the number of laptops by manufacturer
laptop_counts = powerful_laptops["Manufacturer"].value_counts()
 
# Create a bar chart of the distribution of laptops with a powerful CPU and at least 8GB of RAM
plt.figure(figsize=(10, 6))
laptop_counts.plot(kind="bar")
plt.title("Distribution of Laptops with Powerful CPU and at least 8GB RAM")
plt.xlabel("Manufacturer")
plt.ylabel("Number of Laptops")
# plt.show()

print(laptop_counts)
# pickle.dump(laptop_counts,open("./ref_result/laptop_counts.pkl","wb"))

plt.savefig('./ref_result/powerful_laptops_distribution.png')
# plt.show()


import pandas as pd   
import pickle
import matplotlib.pyplot as plt

# Load the dataset

# Get the indices of the powerful laptops
powerful_laptop_indices = powerful_laptops.index
 
# Access the 'Storage' and 'GPU' columns for the powerful laptops using their indices
powerful_laptops_storage_gpu = laptops_price.loc[powerful_laptop_indices, [' Storage', 'GPU']]
 
# Group the powerful laptops by storage and GPU options
storage_gpu_counts = powerful_laptops_storage_gpu.groupby([' Storage', 'GPU']).size().reset_index(name='Count')
 
# Pivot the data to create a stacked bar chart
storage_gpu_pivot = storage_gpu_counts.pivot_table(index=' Storage', columns='GPU', values='Count', fill_value=0)
 
# Create a stacked bar chart comparing storage and GPU options for the selected laptops
ax = storage_gpu_pivot.plot(kind='bar', stacked=True, figsize=(12, 6))
plt.title('Storage and GPU Options for Laptops with Powerful CPU and at least 8GB RAM')
plt.xlabel('Storage')
plt.ylabel('Number of Laptops')
plt.xticks(rotation=45)
plt.legend(title='GPU', bbox_to_anchor=(1.05, 1), loc='upper left')
 
# Annotate the bars with the counts
for p in ax.patches:
    width, height = p.get_width(), p.get_height()
    x, y = p.get_xy()
    ax.annotate(f'{height}', (x + width / 2, y + height / 2), ha='center', va='center')

plt.savefig('./ref_result/bar_chart.png')
# plt.show()

print(storage_gpu_pivot)
# pickle.dump(storage_gpu_pivot,open("./ref_result/storage_gpu_pivot.pkl","wb"))


